Load libraries and data (eye movements, click data, production data). CLick data taken from same as Data/InterpretationTlessC_01262017/Jan-26-2017-Batch_2666254_batch_results_intp_preprocessed.csv. Production data taken from Data/ImprecisionPracticeListenerFreeProdFull40trials 10-5-17imprecision_freeprod_oct2017_120participants_117native_prodprobs.csv.
require(tidyverse)
library(forcats)
library(scales)
##
## Attaching package: 'scales'
## The following object is masked from 'package:purrr':
##
## discard
## The following object is masked from 'package:readr':
##
## col_factor
library(knitr)
source("helpers.R")
eyedat = read.csv("../data/eyedata.csv")
clickdat = read.csv("../data/clickdata.csv")
proddat = read.csv("../data/production.csv")
Reformat eye movement data so it can be merged.
ed = eyedat %>%
select(condition, itemid, Window, Region, totalLooks, Proportion) %>%
rename(prop.eye = Proportion, freq.eye = totalLooks) %>%
mutate(Region = recode(Region, distractor.contrast = "contrast"))
Reformat click data and compute belief distributions by scene/condition/window combination. Join eye and click data and assign zero probabilties to NA values.
cd = clickdat %>%
filter(Condition %in% c("Contrast","NoContrast")) %>%
select(SceneID, Condition, Answer.choicePrior, Answer.choiceAdj, Answer.choiceWhole) %>%
gather(Window, Region, -SceneID, -Condition) %>%
mutate(Region = tolower(Region), Condition = tolower(Condition)) %>%
mutate(Window = recode(Window, Answer.choicePrior = "prior", Answer.choiceAdj = "adjective", Answer.choiceWhole = "noun")) %>%
group_by(SceneID, Condition, Window, Region) %>%
summarise (n = n()) %>%
mutate(freq = n / sum(n)) %>%
rename(condition = Condition, itemid = SceneID, prop.click=freq, freq.click=n)
## Warning: attributes are not identical across measure variables; they will
## be dropped
ecd = full_join(ed,cd,by=c("condition","itemid","Window","Region")) %>%
filter(!is.na(prop.eye)) %>%
replace_na(list(prop.click = 0, freq.click = 0))
## Warning: Column `condition` joining factor and character vector, coercing
## into character vector
## Warning: Column `Window` joining factor and character vector, coercing into
## character vector
## Warning: Column `Region` joining factor and character vector, coercing into
## character vector
Reformat production data. Add small (.000001) smoothing probability to each probability to allow for computing non-infinite surprisal values.
prod = proddat %>%
rename(condition = Condition, itemid = SceneID) %>%
mutate(condition = tolower(condition))
prod[,c(8,9,10,11,12,13,14,15,16,17)] = prod[,c(8,9,10,11,12,13,14,15,16,17)] + .000001
prod = prod %>%
mutate(surpExactFirstWordUniPrior = -log(ExactFirstWordUniPrior), surpInitialMatchUniPrior = -log(InitialMatchUniPrior), surpMorphemeIncludedUniPrior = -log(MorphemeIncludedUniPrior), surpSynFirstUniPrior = -log(SynFirstUniPrior), surpSynIncludedUniPrior = -log(SynIncludedUniPrior), surpExactFirstWordEmpPrior = -log(ExactFirstWordEmpPrior), surpInitialMatchEmpPrior = -log(InitialMatchEmpPrior), surpMorphemeIncludedEmpPrior = -log(MorphemeIncludedEmpPrior), surpSynFirstEmpPrior = -log(SynFirstEmpPrior), surpSynIncludedEmpPrior = -log(SynIncludedEmpPrior))
Join the production dataset with the eye and click data.
fulld = full_join(ecd,prod,by=c("condition","itemid")) %>%
filter(!is.na(Region)) %>%
droplevels()
nrow(fulld)
## [1] 675
head(fulld)
## condition itemid Window Region freq.eye prop.eye freq.click
## 1 contrast 23 adjective competitor 27 0.19285714 11
## 2 contrast 23 adjective distractor 46 0.32857143 1
## 3 contrast 23 adjective contrast 39 0.27857143 0
## 4 contrast 23 adjective target 28 0.20000000 38
## 5 contrast 23 noun competitor 22 0.14012739 0
## 6 contrast 23 noun distractor 4 0.02547771 0
## prop.click Input.List TargetNoun TargetAdjective TargetColor
## 1 0.22 TlessC1 square big red
## 2 0.02 TlessC1 square big red
## 3 0.00 TlessC1 square big red
## 4 0.76 TlessC1 square big red
## 5 0.00 TlessC1 square big red
## 6 0.00 TlessC1 square big red
## AdjectiveType ExactFirstWordUniPrior InitialMatchUniPrior
## 1 Rel 0.05263258 0.1315799
## 2 Rel 0.05263258 0.1315799
## 3 Rel 0.05263258 0.1315799
## 4 Rel 0.05263258 0.1315799
## 5 Rel 0.05263258 0.1315799
## 6 Rel 0.05263258 0.1315799
## MorphemeIncludedUniPrior SynFirstUniPrior SynIncludedUniPrior
## 1 0.1403519 0.2368431 0.245615
## 2 0.1403519 0.2368431 0.245615
## 3 0.1403519 0.2368431 0.245615
## 4 0.1403519 0.2368431 0.245615
## 5 0.1403519 0.2368431 0.245615
## 6 0.1403519 0.2368431 0.245615
## ExactFirstWordEmpPrior InitialMatchEmpPrior MorphemeIncludedEmpPrior
## 1 0.04363736 0.1090919 0.1163646
## 2 0.04363736 0.1090919 0.1163646
## 3 0.04363736 0.1090919 0.1163646
## 4 0.04363736 0.1090919 0.1163646
## 5 0.04363736 0.1090919 0.1163646
## 6 0.04363736 0.1090919 0.1163646
## SynFirstEmpPrior SynIncludedEmpPrior surpExactFirstWordUniPrior
## 1 0.1963646 0.2036374 2.94442
## 2 0.1963646 0.2036374 2.94442
## 3 0.1963646 0.2036374 2.94442
## 4 0.1963646 0.2036374 2.94442
## 5 0.1963646 0.2036374 2.94442
## 6 0.1963646 0.2036374 2.94442
## surpInitialMatchUniPrior surpMorphemeIncludedUniPrior
## 1 2.028141 1.963603
## 2 2.028141 1.963603
## 3 2.028141 1.963603
## 4 2.028141 1.963603
## 5 2.028141 1.963603
## 6 2.028141 1.963603
## surpSynFirstUniPrior surpSynIncludedUniPrior surpExactFirstWordEmpPrior
## 1 1.440357 1.40399 3.131842
## 2 1.440357 1.40399 3.131842
## 3 1.440357 1.40399 3.131842
## 4 1.440357 1.40399 3.131842
## 5 1.440357 1.40399 3.131842
## 6 1.440357 1.40399 3.131842
## surpInitialMatchEmpPrior surpMorphemeIncludedEmpPrior
## 1 2.215565 2.151027
## 2 2.215565 2.151027
## 3 2.215565 2.151027
## 4 2.215565 2.151027
## 5 2.215565 2.151027
## 6 2.215565 2.151027
## surpSynFirstEmpPrior surpSynIncludedEmpPrior
## 1 1.627782 1.591414
## 2 1.627782 1.591414
## 3 1.627782 1.591414
## 4 1.627782 1.591414
## 5 1.627782 1.591414
## 6 1.627782 1.591414
summary(fulld)
## condition itemid Window Region
## Length:675 Min. :11.00 Length:675 Length:675
## Class :character 1st Qu.:18.00 Class :character Class :character
## Mode :character Median :25.00 Mode :character Mode :character
## Mean :25.36
## 3rd Qu.:33.00
## Max. :40.00
##
## freq.eye prop.eye freq.click prop.click
## Min. : 1.00 Min. :0.003333 Min. : 0.00 Min. :0.0000
## 1st Qu.: 20.00 1st Qu.:0.147252 1st Qu.: 1.00 1st Qu.:0.0200
## Median : 39.00 Median :0.235294 Median :10.00 Median :0.2000
## Mean : 51.19 Mean :0.266667 Mean :13.04 Mean :0.2609
## 3rd Qu.: 74.00 3rd Qu.:0.363191 3rd Qu.:18.00 3rd Qu.:0.3600
## Max. :198.00 Max. :0.909091 Max. :50.00 Max. :1.0000
##
## Input.List TargetNoun TargetAdjective TargetColor
## TlessC1:337 line :212 short : 57 blue :178
## TlessC2:338 cylinder: 94 flat : 48 green :113
## triangle: 71 small : 47 red :174
## cube : 47 big : 46 transparent: 46
## oval : 47 empty : 46 yellow :164
## spiral : 46 tall : 46
## (Other) :158 (Other):385
## AdjectiveType ExactFirstWordUniPrior InitialMatchUniPrior
## Max:229 Min. :0.000001 Min. :0.000001
## Rel:446 1st Qu.:0.000001 1st Qu.:0.000001
## Median :0.008773 Median :0.008773
## Mean :0.026986 Mean :0.050311
## 3rd Qu.:0.050001 3rd Qu.:0.075001
## Max. :0.125001 Max. :0.258334
##
## MorphemeIncludedUniPrior SynFirstUniPrior SynIncludedUniPrior
## Min. :0.000001 Min. :0.000001 Min. :0.000001
## 1st Qu.:0.000001 1st Qu.:0.000001 1st Qu.:0.000001
## Median :0.016668 Median :0.016668 Median :0.017545
## Mean :0.059973 Mean :0.065449 Mean :0.076376
## 3rd Qu.:0.131580 3rd Qu.:0.125001 3rd Qu.:0.170834
## Max. :0.258334 Max. :0.258334 Max. :0.258334
##
## ExactFirstWordEmpPrior InitialMatchEmpPrior MorphemeIncludedEmpPrior
## Min. :0.000001 Min. :0.000001 Min. :0.000001
## 1st Qu.:0.000001 1st Qu.:0.000001 1st Qu.:0.000001
## Median :0.008463 Median :0.008572 Median :0.012728
## Mean :0.023731 Mean :0.044408 Mean :0.054111
## 3rd Qu.:0.038371 3rd Qu.:0.095173 3rd Qu.:0.111208
## Max. :0.128227 Max. :0.281617 Max. :0.281617
##
## SynFirstEmpPrior SynIncludedEmpPrior surpExactFirstWordUniPrior
## Min. :0.000001 Min. :0.000001 Min. : 2.079
## 1st Qu.:0.000001 1st Qu.:0.000001 1st Qu.: 2.996
## Median :0.011236 Median :0.018890 Median : 4.736
## Mean :0.061231 Mean :0.072547 Mean : 7.601
## 3rd Qu.:0.129232 3rd Qu.:0.138462 3rd Qu.:13.816
## Max. :0.295220 Max. :0.334001 Max. :13.816
##
## surpInitialMatchUniPrior surpMorphemeIncludedUniPrior
## Min. : 1.354 Min. : 1.354
## 1st Qu.: 2.590 1st Qu.: 2.028
## Median : 4.736 Median : 4.094
## Mean : 7.286 Mean : 6.662
## 3rd Qu.:13.816 3rd Qu.:13.816
## Max. :13.816 Max. :13.816
##
## surpSynFirstUniPrior surpSynIncludedUniPrior surpExactFirstWordEmpPrior
## Min. : 1.354 Min. : 1.354 Min. : 2.054
## 1st Qu.: 2.079 1st Qu.: 1.767 1st Qu.: 3.270
## Median : 4.094 Median : 4.043 Median : 4.772
## Mean : 7.106 Mean : 6.156 Mean : 7.663
## 3rd Qu.:13.816 3rd Qu.:13.816 3rd Qu.:13.816
## Max. :13.816 Max. :13.816 Max. :13.816
##
## surpInitialMatchEmpPrior surpMorphemeIncludedEmpPrior
## Min. : 1.267 Min. : 1.267
## 1st Qu.: 2.352 1st Qu.: 2.196
## Median : 4.759 Median : 4.364
## Mean : 7.359 Mean : 6.741
## 3rd Qu.:13.816 3rd Qu.:13.816
## Max. :13.816 Max. :13.816
##
## surpSynFirstEmpPrior surpSynIncludedEmpPrior
## Min. : 1.220 Min. : 1.097
## 1st Qu.: 2.046 1st Qu.: 1.977
## Median : 4.489 Median : 3.969
## Mean : 7.179 Mean : 6.224
## 3rd Qu.:13.816 3rd Qu.:13.816
## Max. :13.816 Max. :13.816
##
Start by computing the correlation between the pairwise production probability estimates computed on the uniforma versus empirical prior. Generally, the correlation between the different ways of estimating probs (pairwise) is high (>.9).
cor(prod$ExactFirstWordEmpPrior,prod$ExactFirstWordUniPrior)
## [1] 0.9469788
ggplot(prod, aes(x=ExactFirstWordUniPrior, y=ExactFirstWordEmpPrior, color=TargetAdjective)) +
geom_point() +
xlim(0,.2) +
ylim(0,.2) +
geom_abline(intercept=0, slope=1, linetype="dashed", color="gray60")
cor(prod$InitialMatchUniPrior,prod$InitialMatchEmpPrior)
## [1] 0.9326798
ggplot(prod, aes(x=InitialMatchUniPrior, y=InitialMatchEmpPrior, color=TargetAdjective)) +
geom_point() +
xlim(0,.3) +
ylim(0,.3) +
geom_abline(intercept=0, slope=1, linetype="dashed", color="gray60")
cor(prod$MorphemeIncludedUniPrior,prod$MorphemeIncludedEmpPrior)
## [1] 0.9210687
ggplot(prod, aes(x=MorphemeIncludedUniPrior, y=MorphemeIncludedEmpPrior, color=TargetAdjective)) +
geom_point() +
xlim(0,.3) +
ylim(0,.3) +
geom_abline(intercept=0, slope=1, linetype="dashed", color="gray60")
cor(prod$SynFirstUniPrior,prod$SynFirstEmpPrior)
## [1] 0.9228942
ggplot(prod, aes(x=SynFirstUniPrior, y=SynFirstEmpPrior, color=TargetAdjective)) +
geom_point() +
xlim(0,.3) +
ylim(0,.3) +
geom_abline(intercept=0, slope=1, linetype="dashed", color="gray60")
cor(prod$SynIncludedUniPrior,prod$SynIncludedEmpPrior)
## [1] 0.9177232
ggplot(prod, aes(x=SynIncludedUniPrior, y=SynIncludedEmpPrior, color=TargetAdjective)) +
geom_point() +
xlim(0,.35) +
ylim(0,.35) +
geom_abline(intercept=0, slope=1, linetype="dashed", color="gray60")
We ultimately want to weight the empirical and the backoff (uniform) prior by beta and 1-beta, respectively. How to determine beta?
In principle we can consider both the probability and the surprisal of the target adjective as determining this weight in some way. The easiest way would be to take probabilities directly. However, given that production probabilities are generally low (ie lower than .4), this would yield a very strong bias towards uniform, which is clearly not right. For the sake of making things as comparable as possible, I’m therefore rescaling both the probabilities and the surprisals to fall in the interval [0,1], and am further inverting the scale for surprisal values so a greater transformed value means a greater expectation for the adjective, ie more reliance on empirical beliefs when weighting.
probmeasures = c("ExactFirstWordEmpPrior", "ExactFirstWordUniPrior", "InitialMatchUniPrior", "InitialMatchEmpPrior", "MorphemeIncludedEmpPrior", "MorphemeIncludedUniPrior", "SynFirstEmpPrior", "SynFirstUniPrior", "SynIncludedEmpPrior", "SynIncludedUniPrior")
gprod = prod[,c(2,3,5,7:27)] %>%
gather(Measure, Value, -AdjectiveType, -itemid, -condition, -TargetAdjective) %>%
group_by(Measure) %>%
mutate(RescaledWeight=rescale(Value)) %>%
mutate(MeasureType=ifelse(Measure %in% probmeasures, "probability", "surprisal"), Prior=gsub('^surp',"",Measure,perl=T))
gprod[gprod$MeasureType == "surprisal",]$RescaledWeight = 1 - gprod[gprod$MeasureType == "surprisal",]$RescaledWeight
To get a sense of the distribution of (rescaled) adjective expectations:
ggplot(gprod, aes(x=RescaledWeight,color=MeasureType)) +
geom_density() +
facet_wrap(~Measure, scales="free", nrow=5) +
theme(legend.position="top")
Is there a difference in mean expectation by adjective type with raw values (probs and surprisals)? Yes. Incidentally, this includes the min adjectives, for which we don’t have the eye movement data. Interestingly, the min adjectives seem to have been overall the most expected of all. This makes it all the more interesting to get our hands on the min adjective eye movement data.
agr = gprod %>%
group_by(AdjectiveType, MeasureType, Prior) %>%
summarize(MeanProductionProbability = mean(Value), CILow=ci.low(Value), CIHigh=ci.high(Value)) %>%
mutate(Ymin=MeanProductionProbability-CILow, Ymax=MeanProductionProbability+CIHigh)
ggplot(agr, aes(x=AdjectiveType, y=MeanProductionProbability)) +
geom_bar(stat="identity") +
geom_errorbar(aes(ymin=Ymin,ymax=Ymax),width=.25) +
facet_grid(MeasureType~Prior, scales="free")
The same thing with rescaled adjective expectations between 0 and 1 (ie probs and surprisals projected into [0,1] interval).
agr = gprod %>%
group_by(AdjectiveType, MeasureType, Prior) %>%
summarize(MeanProductionProbability = mean(RescaledWeight), CILow=ci.low(RescaledWeight), CIHigh=ci.high(RescaledWeight)) %>%
mutate(Ymin=MeanProductionProbability-CILow, Ymax=MeanProductionProbability+CIHigh)
ggplot(agr, aes(x=AdjectiveType, y=MeanProductionProbability)) +
geom_bar(stat="identity") +
xlab("Mean rescaled probability / surprisal of adjective") +
geom_errorbar(aes(ymin=Ymin,ymax=Ymax),width=.25) +
facet_grid(MeasureType~Prior, scales="free")
What we’re testing: is the proportion of looks in the adjective better explained by raw referent probability (as estimated from click data), or by a weighting between the empirical belief distribution and a backoff prior (currently assumed to be uniform). Let’s see.
First, the overall correlations of click and eye data in different windows as a baseline:
fulld %>%
group_by(Window) %>%
summarize(Cor = cor(prop.click,prop.eye))
Correlations of click and eye data in different windows, separately for max and rel adjectives:
fulld %>%
group_by(AdjectiveType, Window) %>%
summarize(Cor = cor(prop.click,prop.eye))
## # A tibble: 6 x 3
## # Groups: AdjectiveType [?]
## AdjectiveType Window Cor
## <fctr> <chr> <dbl>
## 1 Max adjective 0.24557877
## 2 Max noun 0.84387762
## 3 Max prior 0.10444631
## 4 Rel adjective 0.50187791
## 5 Rel noun 0.76609604
## 6 Rel prior -0.02251489
Plots of the eye data against the click data.
ggplot(fulld, aes(x=prop.click,y=prop.eye,color=Region)) +
geom_abline(intercept=0,slope=1,linetype="dashed",color="gray40") +
geom_point() +
geom_smooth(method="lm",aes(group=1)) +
facet_grid(Window~AdjectiveType)
ggplot(fulld, aes(x=prop.click,y=prop.eye,color=Region)) +
geom_abline(intercept=0,slope=1,linetype="dashed",color="gray40") +
geom_point() +
geom_text(aes(label=TargetAdjective), size=2.5,color="black") +
geom_smooth(method="lm",aes(group=1)) +
facet_grid(Window~AdjectiveType)
Join the rescaled production expectations and the eye / click data (looking only at the adjective window, ie getting rid of the noun and prior window eye and click data). Compute the predicted proportion of looks per region by mixing empirical (prop.click) and uniform (.25) probabilities according to rescaled weight derived from adjective production expectations.
testd = fulld %>%
filter(Window == "adjective") %>%
select(condition,itemid,Region,prop.eye,prop.click,TargetAdjective) %>%
full_join(gprod, by=c("itemid","condition","TargetAdjective")) %>%
rowwise() %>%
mutate(predicted.prop.eye = weighted.mean(x=c(prop.click,0.25),w=c(RescaledWeight, 1-RescaledWeight))) %>%
filter(!is.na(predicted.prop.eye) & !is.na(prop.click)) %>%
droplevels()
## Warning: Column `TargetAdjective` joining factors with different levels,
## coercing to character vector
The money plot: predicted data against eye data. Do any of these do better than the baseline above? Nope :(
results = testd %>%
group_by(AdjectiveType,Measure) %>%
summarize(Correlation=cor(prop.eye,predicted.prop.eye)) %>%
arrange(Correlation)
## Warning: Grouping rowwise data frame strips rowwise nature
results
## # A tibble: 40 x 3
## # Groups: AdjectiveType [2]
## AdjectiveType Measure Correlation
## <fctr> <chr> <dbl>
## 1 Max ExactFirstWordUniPrior 0.1857383
## 2 Max ExactFirstWordEmpPrior 0.1892968
## 3 Max InitialMatchEmpPrior 0.1899488
## 4 Max SynFirstEmpPrior 0.1924734
## 5 Max InitialMatchUniPrior 0.1953220
## 6 Max SynFirstUniPrior 0.1987355
## 7 Max SynIncludedEmpPrior 0.2115141
## 8 Max SynIncludedUniPrior 0.2240578
## 9 Max MorphemeIncludedEmpPrior 0.2265207
## 10 Max MorphemeIncludedUniPrior 0.2353699
## # ... with 30 more rows
ggplot(testd, aes(x=predicted.prop.eye,y=prop.eye,color=Region)) +
geom_abline(intercept=0,slope=1,linetype="dashed",color="gray40") +
geom_point() +
xlim(0,1) +
ylim(0,1) +
geom_smooth(method="lm",aes(group=1)) +
facet_grid(Measure~AdjectiveType)
Overall, mixing doesn’t appear to improve our ability to predict the eye movement data; if anything, it makes it worse. Visually, this is because of the “band of .25s”, ie the fact that we have lots of cases of adjectives with zero probability, which get assigned an empirical weight of 0 and a uniform weight of 1. So…